import pandas as pd
sentence_list1 = []
sentence_list2 = []
score_list = []
file = pd.read_excel("C:\\Users\\gt\\Desktop\\rokid\\rokid_data\\1.xlsx")
def add_to_list1(sentence):
    sentence_list1.append(sentence)

def add_to_list2(sentence):
    sentence_list2.append(sentence)

def add_to_score(score):
    score_list.append(score)


file["query_1"].apply(add_to_list1)
file["query_2"].apply(add_to_list2)
file["score"].apply(add_to_score)
file = pd.read_excel("C:\\Users\\gt\\Desktop\\rokid\\rokid_data\\2.xlsx")

file["query_1"].apply(add_to_list1)
file["query_2"].apply(add_to_list2)
file["score"].apply(add_to_score)
file = pd.read_excel("C:\\Users\\gt\\Desktop\\rokid\\rokid_data\\3.xlsx")

file["query_1"].apply(add_to_list1)
file["query_2"].apply(add_to_list2)
file["score"].apply(add_to_score)
file = pd.read_excel("C:\\Users\\gt\\Desktop\\rokid\\rokid_data\\4.xlsx")

file["query_1"].apply(add_to_list1)
file["query_2"].apply(add_to_list2)
file["score"].apply(add_to_score)
file = pd.read_excel("C:\\Users\\gt\\Desktop\\rokid\\rokid_data\\5.xlsx")

file["query_1"].apply(add_to_list1)
file["query_2"].apply(add_to_list2)
file["score"].apply(add_to_score)
len_ = len(sentence_list1)
tmp = 0
for i in range(len_):
    if i >= len_ - tmp:
        break
    for _ in range(len(sentence_list1[i])):
        try:
            sentence_list1[i] = sentence_list1[i].replace(" ", "").replace("？","")
        except:
            break
    for _ in range(len(sentence_list2[i])):
        try:
            sentence_list2[i] = sentence_list2[i].replace(" ", "").replace("？","")
        except:
            break

    try:
        score_list[i] = int(score_list[i])
    except:
        del sentence_list1[i]
        del sentence_list2[i]
        del score_list[i]
        tmp += 1

    if sentence_list1[i]==sentence_list2[i]:
        del sentence_list1[i]
        del sentence_list2[i]
        del score_list[i]
        tmp+=1


f=open("data.tsv",mode="w",encoding="utf-8")
for i in range(len(sentence_list1)):
    f.write(sentence_list1[i])
    f.write("\t")
    f.write(sentence_list2[i])
    f.write("\t")
    f.write(str(score_list[i]))
    f.write("\n")